from utils import paddle_aux
import os
import paddle
"""
Train a YOLOv5 model on a custom dataset

Usage:
    $ python path/to/train.py --data coco128.yaml --weights yolov5s.pt --img 640
"""
import argparse
import logging
import math
import random
import sys
import time
from copy import deepcopy
from pathlib import Path
import numpy as np
import yaml
from tqdm import tqdm
import wandb
import val
from models.experimental import attempt_load
from models.yolo import Model
from utils.autoanchor import check_anchors
from utils.autobatch import check_train_batch_size
from utils.datasets import create_dataloader
from utils.general import labels_to_class_weights, increment_path, labels_to_image_weights, init_seeds, strip_optimizer, get_latest_run, check_dataset, check_git_status, check_img_size, check_requirements, check_file, check_yaml, check_suffix, print_args, print_mutation, set_logging, one_cycle, colorstr, methods
from utils.downloads import attempt_download
from utils.loss import ComputeLoss
from utils.plots import plot_labels, plot_evolve
from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, intersect_dicts, select_device, torch_distributed_zero_first
from utils.loggers.wandb.wandb_utils import check_wandb_resume
from utils.metrics import fitness
from utils.loggers import Loggers
from utils.cawb import CosineAnnealingWarmbootingLR
from utils.callbacks import Callbacks
FILE = Path(__file__).resolve()
ROOT = FILE.parents[0]
if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))
ROOT = Path(os.path.relpath(ROOT, Path.cwd()))
LOGGER = logging.getLogger(__name__)
LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1))
RANK = int(os.getenv('RANK', -1))
WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))


def train(hyp, opt, device, callbacks):
    (save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg,
        resume, noval, nosave, workers, freeze) = (Path(opt.save_dir), opt.
        epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve,
        opt.data, opt.cfg, opt.resume, opt.noval, opt.nosave, opt.workers,
        opt.freeze)
    w = save_dir / 'weights'
    (w.parent if evolve else w).mkdir(parents=True, exist_ok=True)
    last, best = w / 'last.pt', w / 'best.pt'
    if isinstance(hyp, str):
        with open(hyp, errors='ignore') as f:
            hyp = yaml.safe_load(f)
    LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k,
        v in hyp.items()))
    with open(save_dir / 'hyp.yaml', 'w') as f:
        yaml.safe_dump(hyp, f, sort_keys=False)
    with open(save_dir / 'opt.yaml', 'w') as f:
        yaml.safe_dump(vars(opt), f, sort_keys=False)
    data_dict = None
    if RANK in [-1, 0]:
        loggers = Loggers(save_dir, weights, opt, hyp, LOGGER)
        if loggers.wandb:
            data_dict = loggers.wandb.data_dict
            if resume:
                weights, epochs, hyp = opt.weights, opt.epochs, opt.hyp
        for k in methods(loggers):
            callbacks.register_action(k, callback=getattr(loggers, k))
    plots = not evolve
    cuda = device.type != 'cpu'
    init_seeds(1 + RANK)
    with torch_distributed_zero_first(LOCAL_RANK):
        data_dict = data_dict or check_dataset(data)
    train_path, val_path = data_dict['train'], data_dict['val']
    nc = 1 if single_cls else int(data_dict['nc'])
    names = ['item'] if single_cls and len(data_dict['names']
        ) != 1 else data_dict['names']
    assert len(names
        ) == nc, f'{len(names)} names found for nc={nc} dataset in {data}'
    is_coco = data.endswith('coco.yaml') and nc == 80
    check_suffix(weights, '.pt')
    pretrained = weights.endswith('.pt')
    if pretrained:
        with torch_distributed_zero_first(LOCAL_RANK):
            weights = attempt_download(weights)
        ckpt = paddle.load(path=str(weights))
        model = Model(cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.
            get('anchors')).to(device)
        exclude = ['anchor'] if (cfg or hyp.get('anchors')
            ) and not resume else []
        csd = ckpt['model'].astype(dtype='float32').state_dict()
        csd = intersect_dicts(csd, model.state_dict(), exclude=exclude)
        model.set_state_dict(state_dict=csd)
        LOGGER.info(
            f'Transferred {len(csd)}/{len(model.state_dict())} items from {weights}'
            )
    else:
        model = Model(cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)
    freeze = [f'model.{x}.' for x in range(freeze)]
    for k, v in model.named_parameters():
        v.stop_gradient = not True
        if any(x in k for x in freeze):
            print(f'freezing {k}')
            v.stop_gradient = not False
    gs = max(int(model.stride.max()), 32)
    imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2)
    if RANK == -1 and batch_size == -1:
        batch_size = check_train_batch_size(model, imgsz)
    train_loader, dataset = create_dataloader(train_path, imgsz, batch_size //
        WORLD_SIZE, gs, single_cls, hyp=hyp, augment=True, cache=opt.cache,
        rect=opt.rect, rank=LOCAL_RANK, workers=workers, image_weights=opt.
        image_weights, quad=opt.quad, prefix=colorstr('train: '))
    mlc = int(np.concatenate(dataset.labels, 0)[:, 0].max())
    nb = len(train_loader)
    assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}'
    nbs = 64
    accumulate = max(round(nbs / batch_size), 1)
    hyp['weight_decay'] *= batch_size * accumulate / nbs
    LOGGER.info(f"Scaled weight_decay = {hyp['weight_decay']}")
    g0, g1, g2 = [], [], []
    for v in model.sublayers():
        if hasattr(v, 'bias') and isinstance(v.bias, paddle.base.framework.
            EagerParamBase.from_tensor):
            g2.append(v.bias)
        if isinstance(v, paddle.nn.BatchNorm2D):
            g0.append(v.weight)
        elif hasattr(v, 'weight') and isinstance(v.weight, paddle.base.
            framework.EagerParamBase.from_tensor):
            g1.append(v.weight)
    if opt.adam:
        optimizer = paddle.optimizer.Adam(parameters=g0, learning_rate=hyp[
            'lr0'], beta1=(hyp['momentum'], 0.999)[0], beta2=(hyp[
            'momentum'], 0.999)[1], weight_decay=0.0)
    else:
        optimizer = paddle.optimizer.SGD(g0, lr=hyp['lr0'], momentum=hyp[
            'momentum'], nesterov=True)
    """Class Method: *.add_param_group, can not convert, please check whether it is torch.Tensor.*/Optimizer.*/nn.Module.*/torch.distributions.Distribution.*/torch.autograd.function.FunctionCtx.*/torch.profiler.profile.*/torch.autograd.profiler.profile.*, and convert manually"""
    optimizer.add_param_group({'params': g1, 'weight_decay': hyp[
        'weight_decay']})
    """Class Method: *.add_param_group, can not convert, please check whether it is torch.Tensor.*/Optimizer.*/nn.Module.*/torch.distributions.Distribution.*/torch.autograd.function.FunctionCtx.*/torch.profiler.profile.*/torch.autograd.profiler.profile.*, and convert manually"""
    optimizer.add_param_group({'params': g2})
    LOGGER.info(
        f"{colorstr('optimizer:')} {type(optimizer).__name__} with parameter groups {len(g0)} weight, {len(g1)} weight (no decay), {len(g2)} bias"
        )
    if opt.linear_lr:
        lf = lambda x: (1 - x / (epochs - 1)) * (1.0 - hyp['lrf']) + hyp['lrf']
        tmp_lr = paddle.optimizer.lr.LambdaDecay(lr_lambda=lf,
            learning_rate=optimizer.get_lr())
        optimizer.set_lr_scheduler(tmp_lr)
        scheduler = tmp_lr
    else:
        lf = lambda x, y=opt.epochs: ((1 + math.cos(x * math.pi / y)) / 2
            ) ** 1.0 * (1.0 - hyp['lrf']) + hyp['lrf']
        scheduler = CosineAnnealingWarmbootingLR(optimizer, epochs=opt.
            epochs, steps=opt.cawb_steps, step_scale=0.7, lf=lf, batchs=len
            (train_loader), warmup_epoch=hyp['warmup_epochs'], epoch_scale=4.0)
    ema = ModelEMA(model) if RANK in [-1, 0] else None
    start_epoch, best_fitness = 0, 0.0
    if pretrained:
        if ckpt['optimizer'] is not None:
            optimizer.set_state_dict(state_dict=ckpt['optimizer'])
            best_fitness = ckpt['best_fitness']
        if ema and ckpt.get('ema'):
            ema.ema.load_state_dict(ckpt['ema'].astype(dtype='float32').
                state_dict())
            ema.updates = ckpt['updates']
        start_epoch = ckpt['epoch'] + 1
        if resume:
            assert start_epoch > 0, f'{weights} training to {epochs} epochs is finished, nothing to resume.'
        if epochs < start_epoch:
            LOGGER.info(
                f"{weights} has been trained for {ckpt['epoch']} epochs. Fine-tuning for {epochs} more epochs."
                )
            epochs += ckpt['epoch']
        del ckpt, csd
    if cuda and RANK == -1 and paddle.device.cuda.device_count() > 1:
        logging.warning(
            """DP not recommended, instead use torch.distributed.run for best DDP Multi-GPU results.
See Multi-GPU Tutorial at https://github.com/ultralytics/yolov5/issues/475 to get started."""
            )
        model = paddle.DataParallel(layers=model)
    if opt.sync_bn and cuda and RANK != -1:
        model = paddle.nn.SyncBatchNorm.convert_sync_batchnorm(layer=model).to(
            device)
        LOGGER.info('Using SyncBatchNorm()')
    if RANK in [-1, 0]:
        val_loader = create_dataloader(val_path, imgsz, batch_size //
            WORLD_SIZE * 2, gs, single_cls, hyp=hyp, cache=None if noval else
            opt.cache, rect=True, rank=-1, workers=workers, pad=0.5, prefix
            =colorstr('val: '))[0]
        if not resume:
            labels = np.concatenate(dataset.labels, 0)
            if plots:
                plot_labels(labels, names, save_dir)
            if not opt.noautoanchor:
                check_anchors(dataset, model=model, thr=hyp['anchor_t'],
                    imgsz=imgsz)
            model.astype(dtype='float16').astype(dtype='float32')
        callbacks.run('on_pretrain_routine_end')
    if cuda and RANK != -1:
        model = paddle.DataParallel(model, device_ids
            =[LOCAL_RANK], output_device=LOCAL_RANK)
    nl = de_parallel(model).model[-1].nl
    hyp['box'] *= 3.0 / nl
    hyp['cls'] *= nc / 80.0 * 3.0 / nl
    hyp['obj'] *= (imgsz / 640) ** 2 * 3.0 / nl
    hyp['label_smoothing'] = opt.label_smoothing
    model.nc = nc
    model.hyp = hyp
    model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device
        ) * nc
    """Class Attribute: torch.Tensor.names, can not convert, please check whether it is torch.Tensor.*/torch.autograd.function.FunctionCtx.*/torch.distributions.Distribution.* and convert manually"""
    model.names = names
    t0 = time.time()
    nw = max(round(hyp['warmup_epochs'] * nb), 1000)
    last_opt_step = -1
    maps = np.zeros(nc)
    results = 0, 0, 0, 0, 0, 0, 0
    scheduler.last_epoch = start_epoch - 1
    scaler = paddle.amp.GradScaler(enable=cuda, incr_every_n_steps=2000,
        init_loss_scaling=65536.0)
    stopper = EarlyStopping(patience=opt.patience)
    compute_loss = ComputeLoss(model)
    LOGGER.info(
        f"""Image sizes {imgsz} train, {imgsz} val
Using {train_loader.num_workers} dataloader workers
Logging results to {colorstr('bold', save_dir)}
Starting training for {epochs} epochs..."""
        )
    for epoch in range(start_epoch, epochs):
        if epoch == 100 and not resume:
            optimizer = paddle.optimizer.SGD(g0, lr=0.01, momentum=0.937,
                nesterov=True)
            """Class Method: *.add_param_group, can not convert, please check whether it is torch.Tensor.*/Optimizer.*/nn.Module.*/torch.distributions.Distribution.*/torch.autograd.function.FunctionCtx.*/torch.profiler.profile.*/torch.autograd.profiler.profile.*, and convert manually"""
            optimizer.add_param_group({'params': g1, 'weight_decay': hyp[
                'weight_decay']})
            """Class Method: *.add_param_group, can not convert, please check whether it is torch.Tensor.*/Optimizer.*/nn.Module.*/torch.distributions.Distribution.*/torch.autograd.function.FunctionCtx.*/torch.profiler.profile.*/torch.autograd.profiler.profile.*, and convert manually"""
            optimizer.add_param_group({'params': g2})
            LOGGER.info(
                f"{colorstr('optimizer:')} {type(optimizer).__name__} with parameter groups {len(g0)} weight, {len(g1)} weight (no decay), {len(g2)} bias"
                )
            del g0, g1, g2
            if opt.linear_lr:
                lf = lambda x: (1 - x / (epochs - 1)) * (1.0 - hyp['lrf']
                    ) + hyp['lrf']
            else:
                lf = lambda x, y=opt.epochs: ((1 + math.cos(x * math.pi / y
                    )) / 2) ** 1.0 * (1.0 - hyp['lrf']) + hyp['lrf']
                scheduler = CosineAnnealingWarmbootingLR(optimizer, epochs=
                    opt.epochs, steps=opt.cawb_steps, step_scale=0.7, lf=lf,
                    batchs=len(train_loader), warmup_epoch=hyp[
                    'warmup_epochs'], epoch_scale=4.0)
        model.train()
        if opt.image_weights:
            cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc
            iw = labels_to_image_weights(dataset.labels, nc=nc,
                class_weights=cw)
            dataset.indices = random.choices(range(dataset.n), weights=iw,
                k=dataset.n)
        mloss = paddle.zeros(shape=[3])
        if RANK != -1:
            train_loader.sampler.set_epoch(epoch)
        pbar = enumerate(train_loader)
        LOGGER.info(('\n' + '%10s' * 7) % ('Epoch', 'gpu_mem', 'box', 'obj',
            'cls', 'labels', 'img_size'))
        if RANK in [-1, 0]:
            pbar = tqdm(pbar, total=nb)
        optimizer.clear_gradients(set_to_zero=False)
        for i, (imgs, targets, paths, _) in pbar:
            ni = i + nb * epoch
            imgs = imgs.to(device, blocking=not True).astype(dtype='float32'
                ) / 255.0
            if ni <= nw:
                xi = [0, nw]
                accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]
                    ).round())
                for j, x in enumerate(optimizer.param_groups):
                    x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j ==
                        2 else 0.0, x['initial_lr'] * lf(epoch)])
                    if 'momentum' in x:
                        x['momentum'] = np.interp(ni, xi, [hyp[
                            'warmup_momentum'], hyp['momentum']])
            if opt.multi_scale:
                sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs
                sf = sz / max(tuple(imgs.shape)[2:])
                if sf != 1:
                    ns = [(math.ceil(x * sf / gs) * gs) for x in tuple(imgs
                        .shape)[2:]]
                    imgs = paddle.nn.functional.interpolate(x=imgs, size=ns,
                        mode='bilinear', align_corners=False)
            with paddle.amp.auto_cast(enable=cuda):
                pred = model(imgs)
                loss, loss_items = compute_loss(pred, targets.to(device))
                if RANK != -1:
                    loss *= WORLD_SIZE
                if opt.quad:
                    loss *= 4.0
            scaler.scale(loss).backward()
            if ni - last_opt_step >= accumulate:
                scaler.step(optimizer)
                scaler.update()
                optimizer.clear_gradients(set_to_zero=False)
                if ema:
                    ema.update(model)
                last_opt_step = ni
            if RANK in [-1, 0]:
                mloss = (mloss * i + loss_items) / (i + 1)
                mem = (
                    f'{paddle.device.cuda.memory_reserved() / 1000000000.0 if paddle.device.cuda.device_count() >= 1 else 0:.3g}G'
                    )
                pbar.set_description(('%10s' * 2 + '%10.4g' * 5) % (
                    f'{epoch + 1}/{epochs}', mem, *mloss, tuple(targets.
                    shape)[0], tuple(imgs.shape)[-1]))
                callbacks.run('on_train_batch_end', ni, model, imgs,
                    targets, paths, plots, opt.sync_bn)
        lr = [x['lr'] for x in optimizer.param_groups]
        scheduler.step()
        if RANK in [-1, 0]:
            callbacks.run('on_train_epoch_end', epoch=epoch)
            ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names',
                'stride', 'class_weights'])
            final_epoch = epoch + 1 == epochs or stopper.possible_stop
            if not noval or final_epoch:
                results, maps, _ = val.run(data_dict, batch_size=batch_size //
                    WORLD_SIZE * 2, imgsz=imgsz, model=ema.ema, single_cls=
                    single_cls, dataloader=val_loader, save_dir=save_dir,
                    verbose=True, plots=False, callbacks=callbacks,
                    compute_loss=compute_loss)
            fi = fitness(np.array(results).reshape(1, -1))
            if fi > best_fitness:
                best_fitness = fi
            log_vals = list(mloss) + list(results) + lr
            callbacks.run('on_fit_epoch_end', log_vals, epoch, best_fitness, fi
                )
            if not nosave or final_epoch and not evolve:
                ckpt = {'epoch': epoch, 'best_fitness': best_fitness,
                    'model': deepcopy(de_parallel(model)).half(), 'ema':
                    deepcopy(ema.ema).half(), 'updates': ema.updates,
                    'optimizer': optimizer.state_dict(), 'wandb_id': 
                    loggers.wandb.wandb_run.id if loggers.wandb else None}
                paddle.save(obj=ckpt, path=last)
                if best_fitness == fi:
                    paddle.save(obj=ckpt, path=best)
                if (epoch > 0 and opt.save_period > 0 and epoch % opt.
                    save_period == 0):
                    paddle.save(obj=ckpt, path=w / f'epoch{epoch}.pt')
                del ckpt
                callbacks.run('on_model_save', last, epoch, final_epoch,
                    best_fitness, fi)
            if RANK == -1 and stopper(epoch=epoch, fitness=fi):
                break
    if RANK in [-1, 0]:
        LOGGER.info(
            f"""
{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours."""
            )
        for f in (last, best):
            if f.exists():
                strip_optimizer(f)
                if f is best:
                    LOGGER.info(f'\nValidating {f}...')
                    results, _, _ = val.run(data_dict, batch_size=
                        batch_size // WORLD_SIZE * 2, imgsz=imgsz, model=
                        attempt_load(f, device).half(), iou_thres=0.65 if
                        is_coco else 0.6, single_cls=single_cls, dataloader
                        =val_loader, save_dir=save_dir, save_json=is_coco,
                        verbose=True, plots=True, callbacks=callbacks,
                        compute_loss=compute_loss)
                    if is_coco:
                        callbacks.run('on_fit_epoch_end', list(mloss) +
                            list(results) + lr, epoch, best_fitness, fi)
        callbacks.run('on_train_end', last, best, plots, epoch, results)
        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}")
    paddle.device.cuda.empty_cache()
    return results


def parse_opt(known=False):
    parser = argparse.ArgumentParser()
    parser.add_argument('--weights', type=str, default=ROOT / 'yolov5x.pt',
        help='initial weights path')
    parser.add_argument('--cfg', type=str, default=ROOT /
        'models/yolov5x.yaml', help='model.yaml path')
    parser.add_argument('--data', type=str, default=ROOT /
        'data/VOC_2classes.yaml', help='dataset.yaml path')
    parser.add_argument('--hyp', type=str, default=ROOT /
        'data/hyps/hyp.scratch.yaml', help='hyperparameters path')
    parser.add_argument('--epochs', type=int, default=500)
    parser.add_argument('--batch-size', type=int, default=18, help=
        'total batch size for all GPUs, -1 for autobatch')
    parser.add_argument('--imgsz', '--img', '--img-size', type=int, default
        =1024, help='train, val image size (pixels)')
    parser.add_argument('--rect', action='store_true', help=
        'rectangular training')
    parser.add_argument('--resume', nargs='?', const=True, default=False,
        help='resume most recent training')
    parser.add_argument('--nosave', action='store_true', help=
        'only save final checkpoint')
    parser.add_argument('--noval', action='store_true', help=
        'only validate final epoch')
    parser.add_argument('--noautoanchor', action='store_true', help=
        'disable autoanchor check')
    parser.add_argument('--evolve', type=int, nargs='?', const=200, default
        =False, help='evolve hyperparameters for x generations')
    parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
    parser.add_argument('--cache', type=str, nargs='?', const='ram', help=
        '--cache images in "ram" (default) or "disk"')
    parser.add_argument('--image-weights', action='store_true', help=
        'use weighted image selection for training')
    parser.add_argument('--device', default='1', help=
        'cuda device, i.e. 0 or 0,1,2,3 or cpu')
    parser.add_argument('--multi-scale', action='store_true', help=
        'vary img-size +/- 50%%')
    parser.add_argument('--single-cls', default=False, action='store_true',
        help='train multi-class data as single-class')
    parser.add_argument('--adam', action='store_false', help=
        'use torch.optim.Adam() optimizer')
    parser.add_argument('--sync-bn', action='store_true', help=
        'use SyncBatchNorm, only available in DDP mode')
    parser.add_argument('--workers', type=int, default=8, help=
        'maximum number of dataloader workers')
    parser.add_argument('--project', default=ROOT / 'runs/train', help=
        'save to proj0ect/name')
    parser.add_argument('--name', default='exp', help='save to project/name')
    parser.add_argument('--exist-ok', action='store_true', help=
        'existing project/name ok, do not increment')
    parser.add_argument('--quad', action='store_true', help='quad dataloader')
    parser.add_argument('--linear-lr', action='store_true', help='linear LR')
    parser.add_argument('--label-smoothing', type=float, default=0.0, help=
        'Label smoothing epsilon')
    parser.add_argument('--patience', type=int, default=75, help=
        'EarlyStopping patience (epochs without improvement)')
    parser.add_argument('--freeze', type=int, default=0, help=
        'Number of layers to freeze. backbone=10, all=24')
    parser.add_argument('--save-period', type=int, default=-1, help=
        'Save checkpoint every x epochs (disabled if < 1)')
    parser.add_argument('--local_rank', type=int, default=-1, help=
        'DDP parameter, do not modify')
    parser.add_argument('--cawb_steps', nargs='+', type=int, default=[25, 
        75, 125, 175, 225, 275, 325, 375, 425, 475], help=
        'the cawb learning rate scheduler steps')
    parser.add_argument('--entity', default=None, help='W&B: Entity')
    parser.add_argument('--upload_dataset', action='store_true', help=
        'W&B: Upload dataset as artifact table')
    parser.add_argument('--bbox_interval', type=int, default=-1, help=
        'W&B: Set bounding-box image logging interval')
    parser.add_argument('--artifact_alias', type=str, default='latest',
        help='W&B: Version of dataset artifact to use')
    opt = parser.parse_known_args()[0] if known else parser.parse_args()
    return opt


def main(opt, callbacks=Callbacks()):
    set_logging(RANK)
    if RANK in [-1, 0]:
        print_args(FILE.stem, opt)
        check_requirements(exclude=['thop'])
    if opt.resume and not check_wandb_resume(opt) and not opt.evolve:
        ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run()
        assert os.path.isfile(ckpt
            ), 'ERROR: --resume checkpoint does not exist'
        with open(Path(ckpt).parent.parent / 'opt.yaml', errors='ignore') as f:
            opt = argparse.Namespace(**yaml.safe_load(f))
        opt.cfg, opt.weights, opt.resume = '', ckpt, True
        LOGGER.info(f'Resuming training from {ckpt}')
    else:
        opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = check_file(opt
            .data), check_yaml(opt.cfg), check_yaml(opt.hyp), str(opt.weights
            ), str(opt.project)
        assert len(opt.cfg) or len(opt.weights
            ), 'either --cfg or --weights must be specified'
        if opt.evolve:
            opt.project = str(ROOT / 'runs/evolve')
            opt.exist_ok, opt.resume = opt.resume, False
        opt.save_dir = str(increment_path(Path(opt.project) / opt.name,
            exist_ok=opt.exist_ok))
    device = select_device(opt.place, batch_size=opt.batch_size)
    if LOCAL_RANK != -1:
        assert paddle.device.cuda.device_count(
            ) > LOCAL_RANK, 'insufficient CUDA devices for DDP command'
        assert opt.batch_size % WORLD_SIZE == 0, '--batch-size must be multiple of CUDA device count'
        assert not opt.image_weights, '--image-weights argument is not compatible with DDP training'
        assert not opt.evolve, '--evolve argument is not compatible with DDP training'
        paddle.device.set_device(device='gpu:' + str(LOCAL_RANK) if
            isinstance(LOCAL_RANK, int) else str(LOCAL_RANK).replace('cuda',
            'gpu'))
        device = paddle.CUDAPlace(LOCAL_RANK)
        paddle.distributed.init_parallel_env()
    if not opt.evolve:
        train(opt.hyp, opt, device, callbacks)
        if WORLD_SIZE > 1 and RANK == 0:
            LOGGER.info('Destroying process group... ')
            paddle.distributed.destroy_process_group()
    else:
        meta = {'lr0': (1, 1e-05, 0.1), 'lrf': (1, 0.01, 1.0), 'momentum':
            (0.3, 0.6, 0.98), 'weight_decay': (1, 0.0, 0.001),
            'warmup_epochs': (1, 0.0, 5.0), 'warmup_momentum': (1, 0.0, 
            0.95), 'warmup_bias_lr': (1, 0.0, 0.2), 'box': (1, 0.02, 0.2),
            'cls': (1, 0.2, 4.0), 'cls_pw': (1, 0.5, 2.0), 'obj': (1, 0.2, 
            4.0), 'obj_pw': (1, 0.5, 2.0), 'iou_t': (0, 0.1, 0.7),
            'anchor_t': (1, 2.0, 8.0), 'anchors': (2, 2.0, 10.0),
            'fl_gamma': (0, 0.0, 2.0), 'hsv_h': (1, 0.0, 0.1), 'hsv_s': (1,
            0.0, 0.9), 'hsv_v': (1, 0.0, 0.9), 'degrees': (1, 0.0, 45.0),
            'translate': (1, 0.0, 0.9), 'scale': (1, 0.0, 0.9), 'shear': (1,
            0.0, 10.0), 'perspective': (0, 0.0, 0.001), 'flipud': (1, 0.0, 
            1.0), 'fliplr': (0, 0.0, 1.0), 'mosaic': (1, 0.0, 1.0), 'mixup':
            (1, 0.0, 1.0), 'copy_paste': (1, 0.0, 1.0)}
        with open(opt.hyp, errors='ignore') as f:
            hyp = yaml.safe_load(f)
            if 'anchors' not in hyp:
                hyp['anchors'] = 3
        opt.noval, opt.nosave, save_dir = True, True, Path(opt.save_dir)
        evolve_yaml, evolve_csv = (save_dir / 'hyp_evolve.yaml', save_dir /
            'evolve.csv')
        if opt.bucket:
            os.system(f'gsutil cp gs://{opt.bucket}/evolve.csv {save_dir}')
        for _ in range(opt.evolve):
            if evolve_csv.exists():
                parent = 'single'
                x = np.loadtxt(evolve_csv, ndmin=2, delimiter=',', skiprows=1)
                n = min(5, len(x))
                x = x[np.argsort(-fitness(x))][:n]
                w = fitness(x) - fitness(x).min() + 1e-06
                if parent == 'single' or len(x) == 1:
                    x = x[random.choices(range(n), weights=w)[0]]
                elif parent == 'weighted':
                    x = (x * w.reshape(n, 1)).sum(axis=0) / w.sum()
                mp, s = 0.8, 0.2
                npr = np.random
                npr.seed(int(time.time()))
                g = np.array([meta[k][0] for k in hyp.keys()])
                ng = len(meta)
                v = np.ones(ng)
                while all(v == 1):
                    v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.
                        random() * s + 1).clip(0.3, 3.0)
                for i, k in enumerate(hyp.keys()):
                    hyp[k] = float(x[i + 7] * v[i])
            for k, v in meta.items():
                hyp[k] = max(hyp[k], v[1])
                hyp[k] = min(hyp[k], v[2])
                hyp[k] = round(hyp[k], 5)
            results = train(hyp.copy(), opt, device, callbacks)
            print_mutation(results, hyp.copy(), save_dir, opt.bucket)
        plot_evolve(evolve_csv)
        print(
            f"""Hyperparameter evolution finished
Results saved to {colorstr('bold', save_dir)}
Use best hyperparameters example: $ python train.py --hyp {evolve_yaml}"""
            )


def run(**kwargs):
    opt = parse_opt(True)
    for k, v in kwargs.items():
        setattr(opt, k, v)
    main(opt)


if __name__ == '__main__':
    opt = parse_opt()
    main(opt)
